reqpkg <- c("ggplot2","plotly","DESeq2","magrittr","dplyr","genefilter","AnnotationHub","org.Mm.eg.db","GO.db","vsn","pheatmap","biomaRt","curl","RColorBrewer","viridis","fgsea","tidyverse","DT","ggpubr")
for (i in reqpkg) {
suppressWarnings(suppressMessages(library(i, quietly=T, verbose=T, warn.conflicts=F,character.only=T)))
print(i)
}
## [1] "ggplot2"
## [1] "plotly"
## [1] "DESeq2"
## [1] "magrittr"
## [1] "dplyr"
## [1] "genefilter"
## [1] "AnnotationHub"
## [1] "org.Mm.eg.db"
## [1] "GO.db"
## [1] "vsn"
## [1] "pheatmap"
## [1] "biomaRt"
## [1] "curl"
## [1] "RColorBrewer"
## [1] "viridis"
## [1] "fgsea"
## [1] "tidyverse"
## [1] "DT"
## [1] "ggpubr"
theme_set(theme_pubr())
select <- dplyr::select
ensembl <- useMart("ensembl")
ensemblMouse <- useDataset("mmusculus_gene_ensembl", mart=ensembl)
mouseProteinCodingGenes <- getBM(
attributes=c("ensembl_gene_id","external_gene_name","description"),
filters='biotype',
values=c('protein_coding'),
mart=ensemblMouse)
df <- read.csv("combatSeq_corrected_data.csv") %>%
.[rowSums(sapply(., '%in%', mouseProteinCodingGenes$external_gene_name)) > 0,] %>%
set_rownames(.$X) %>%
select(-X)
sex_list <- c(rep("female", 16), rep("male", 16))
cond_list <- rep(c(rep("12wConv", 4), rep("4wConv", 4), rep("GF", 4), rep("SPF", 4)), 2)
condition_list <- data.frame(row.names=colnames(df), Sex=sex_list, Condition=cond_list)
condition_list$Sex <- relevel(condition_list$Sex, "female")
condition_list$Condition <- relevel(condition_list$Condition, "SPF")
dds <- DESeq2::DESeqDataSetFromMatrix(countData=df,
colData = condition_list,
design = ~ Sex + Condition + Sex:Condition)
head(dds)
## class: DESeqDataSet
## dim: 6 32
## metadata(1): version
## assays(1): counts
## rownames(6): Rp1 Sox17 ... Gm37988 Tcea1
## rowData names(0):
## colnames(32): MF.20 MF.24 ... MF.11 MF.12
## colData names(2): Sex Condition
dds <- estimateSizeFactors(dds)
colData(dds)
## DataFrame with 32 rows and 3 columns
## Sex Condition sizeFactor
## <factor> <factor> <numeric>
## MF.20 female 12wConv 1.08051709610664
## MF.24 female 12wConv 1.15107649494338
## MF.1 female 12wConv 0.946422330177653
## MF.3 female 12wConv 0.932181787801605
## MF.19 female 4wConv 1.10847475975552
## ... ... ... ...
## MF.9 male GF 0.924664868739107
## MF.27 male SPF 1.104719567798
## MF.28 male SPF 1.08550756866473
## MF.11 male SPF 0.971434598105476
## MF.12 male SPF 0.921528466633396
dds$Group <- factor(paste0(dds$Sex, dds$Condition), levels = c("femaleSPF", "female12wConv", "female4wConv", "femaleGF", "maleSPF", "male12wConv", "male4wConv", "maleGF"))
design(dds) <- ~ Group
vsd <- vst(dds, blind = TRUE)
head(assay(vsd), 3)
## MF.20 MF.24 MF.1 MF.3 MF.19 MF.21 MF.2
## Rp1 7.422555 7.680962 7.547485 7.549269 7.672037 7.676788 7.766229
## Sox17 9.069960 8.785837 8.699090 8.955951 8.563502 8.671979 8.832556
## Mrpl15 9.981710 10.275255 10.127250 10.101192 10.057358 10.175637 10.061974
## MF.4 MF.29 MF.30 MF.13 MF.14 MF.31 MF.32
## Rp1 7.755435 7.312500 7.689598 7.312500 7.580634 7.565477 7.506628
## Sox17 8.575799 9.062377 8.910226 8.818694 8.826154 8.573463 8.552819
## Mrpl15 10.127827 10.252663 10.409074 10.917596 10.180764 10.092134 10.227681
## MF.15 MF.16 MF.22 MF.23 MF.6 MF.8 MF.17
## Rp1 7.482708 7.430595 7.620154 7.684179 7.796634 7.574916 7.533806
## Sox17 8.628823 8.647058 8.619706 8.794454 8.821164 8.756439 8.775720
## Mrpl15 10.081052 10.095532 9.945673 10.196460 10.053388 10.025887 10.179902
## MF.18 MF.5 MF.7 MF.25 MF.26 MF.10 MF.9
## Rp1 7.687536 7.774401 7.598828 7.556825 7.721238 7.553993 7.648320
## Sox17 8.877565 8.525481 8.814587 8.790266 9.079339 8.802868 8.821464
## Mrpl15 10.095672 10.035899 10.079328 10.066285 10.108856 10.060285 10.111368
## MF.27 MF.28 MF.11 MF.12
## Rp1 7.655965 7.763498 7.678652 7.578663
## Sox17 8.691374 8.554596 8.908309 8.939845
## Mrpl15 9.911917 10.277160 10.167866 10.129378
colData(vsd)
## DataFrame with 32 rows and 4 columns
## Sex Condition sizeFactor Group
## <factor> <factor> <numeric> <factor>
## MF.20 female 12wConv 1.08051709610664 female12wConv
## MF.24 female 12wConv 1.15107649494338 female12wConv
## MF.1 female 12wConv 0.946422330177653 female12wConv
## MF.3 female 12wConv 0.932181787801605 female12wConv
## MF.19 female 4wConv 1.10847475975552 female4wConv
## ... ... ... ... ...
## MF.9 male GF 0.924664868739107 maleGF
## MF.27 male SPF 1.104719567798 maleSPF
## MF.28 male SPF 1.08550756866473 maleSPF
## MF.11 male SPF 0.971434598105476 maleSPF
## MF.12 male SPF 0.921528466633396 maleSPF
rld <- rlog(dds, blind=FALSE)
## rlog() may take a few minutes with 30 or more samples,
## vst() is a much faster transformation
ntd <- normTransform(dds)
meanSdPlot(assay(ntd))
meanSdPlot(assay(vsd))
meanSdPlot(assay(rld))
vst(dds[,1:16]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, female only")
vst(dds[,17:32]) %>% plotPCA(intgroup=c("Sex","Condition")) + ggtitle("vst, male only")
plotPCA(vsd, intgroup=c("Sex","Condition")) + ggtitle("vst")
plotPCA(vsd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1)
plotPCA(ntd, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("nld")
plotPCA(rld, intgroup=c("Sex","Condition")) + geom_text(aes(label=name),vjust=1) + ggtitle("rld")
dds <- DESeq(dds)
## using pre-existing size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
#Compare SPF v GF in F
res_1 <- results(dds,contrast=c("Group", "femaleSPF", "femaleGF"), tidy = TRUE)
#Compare 4w vs 12w in F
res_2 <- results(dds, contrast=c("Group", "female4wConv", "female12wConv"), tidy = TRUE)
#Compare SPF vs 4w in F
res_3 <- results(dds, contrast=c("Group", "femaleSPF", "female4wConv"), tidy = TRUE)
#Compare SPF vs 12w in F
res_4 <- results(dds, contrast=c("Group", "femaleSPF", "female12wConv"), tidy = TRUE)
#Compare SPF v GF in M
res_5 <- results(dds,contrast=c("Group", "maleSPF", "maleGF"), tidy = TRUE)
#Compare 4w vs 12w in M
res_6 <- results(dds, contrast=c("Group", "male4wConv", "male12wConv"), tidy = TRUE)
#Compare SPF vs 4w in M
res_7 <- results(dds, contrast=c("Group", "maleSPF", "male4wConv"), tidy = TRUE)
#Compare SPF vs 12w in M
res_8 <- results(dds, contrast=c("Group", "maleSPF", "male12wConv"), tidy = TRUE)
sum(res_1$pvalue < 0.05, na.rm=TRUE)
## [1] 5656
sum(!is.na(res_1$pvalue))
## [1] 18742
sum(res_1$padj < 0.1, na.rm=TRUE)
## [1] 4795
sum(res_2$pvalue < 0.05, na.rm=TRUE)
## [1] 386
sum(!is.na(res_2$pvalue))
## [1] 18742
sum(res_2$padj < 0.1, na.rm=TRUE)
## [1] 4
sum(res_3$pvalue < 0.05, na.rm=TRUE)
## [1] 2676
sum(!is.na(res_3$pvalue))
## [1] 18742
sum(res_3$padj < 0.1, na.rm=TRUE)
## [1] 1068
sum(res_4$pvalue < 0.05, na.rm=TRUE)
## [1] 2501
sum(!is.na(res_4$pvalue))
## [1] 18742
sum(res_4$padj < 0.1, na.rm=TRUE)
## [1] 1028
sum(res_5$pvalue < 0.05, na.rm=TRUE)
## [1] 2287
sum(!is.na(res_5$pvalue))
## [1] 18742
sum(res_5$padj < 0.1, na.rm=TRUE)
## [1] 674
sum(res_6$pvalue < 0.05, na.rm=TRUE)
## [1] 875
sum(!is.na(res_6$pvalue))
## [1] 18742
sum(res_6$padj < 0.1, na.rm=TRUE)
## [1] 0
sum(res_7$pvalue < 0.05, na.rm=TRUE)
## [1] 1401
sum(!is.na(res_7$pvalue))
## [1] 18742
sum(res_7$padj < 0.1, na.rm=TRUE)
## [1] 168
sum(res_8$pvalue < 0.05, na.rm=TRUE)
## [1] 1738
sum(!is.na(res_8$pvalue))
## [1] 18742
sum(res_8$padj < 0.1, na.rm=TRUE)
## [1] 305
resultsNames(dds)
## [1] "Intercept" "Group_female12wConv_vs_femaleSPF"
## [3] "Group_female4wConv_vs_femaleSPF" "Group_femaleGF_vs_femaleSPF"
## [5] "Group_maleSPF_vs_femaleSPF" "Group_male12wConv_vs_femaleSPF"
## [7] "Group_male4wConv_vs_femaleSPF" "Group_maleGF_vs_femaleSPF"
condition_LFC <- lfcShrink(dds, coef = "Group_female12wConv_vs_femaleSPF", type = 'apeglm')
## using 'apeglm' for LFC shrinkage. If used in published research, please cite:
## Zhu, A., Ibrahim, J.G., Love, M.I. (2018) Heavy-tailed prior distributions for
## sequence count data: removing the noise and preserving large differences.
## Bioinformatics. https://doi.org/10.1093/bioinformatics/bty895
DESeq2::plotMA(condition_LFC, ylim=c(-2,2))
topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 50)
mat <- assay(vsd)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c("Sex","Condition")])
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno, cluster_cols = F)
pheatmap(mat, annotation_col = anno)
pheatmap(mat, color = inferno(length(mat) - 1), annotation_col = anno)
pathway_folder <- 'pathways/'
fgsea_output <- list()
ranks <- list()
mart <- useDataset("mmusculus_gene_ensembl", mart=useMart("ensembl"))
bm <- getBM(attributes=c("external_gene_name", "hsapiens_homolog_associated_gene_name"), mart=mart) %>%
distinct() %>%
as_tibble() %>%
na_if("") %>%
na.omit()
# ens2symbol <- AnnotationDbi::select(org.Mm.eg.db,
# key=WT_res$row,
# columns="SYMBOL",
# keytype="ENSEMBL")
# ens2symbol <- as_tibble(ens2symbol)
# WT_res <- inner_join(WT_res, bm, by=c("row"="ensembl_gene_id"))
fGSEA_pipe <- function(res, path, title, pres=FALSE) {
res2 <- inner_join(res_1, bm, by=c("row"="external_gene_name")) %>%
select(hsapiens_homolog_associated_gene_name, stat) %>%
na.omit() %>%
distinct() %>%
group_by(hsapiens_homolog_associated_gene_name) %>%
summarize(stat=mean(stat))
ranks <- deframe(res2)
if(pres) head(ranks, 20)
if(path == "hallmark") p <- "h.all.v7.0.symbols.gmt"
else if(path == "KEGG") p <- "c2.cp.kegg.v7.0.symbols.gmt"
else if(path == "mir") p <- "c3.mir.v7.0.symbols.gmt"
else if(path == "GO_all") p <- "c5.all.v7.0.symbols.gmt"
else if(path == "GO_BP") p <- "c5.bp.v7.0.symbols.gmt"
fgseaResTidy <- fgsea(pathways=gmtPathways(paste0(pathway_folder, p)), stats=ranks, nperm=100000) %>%
as_tibble() %>%
filter(padj < 0.05) %>%
arrange(desc(NES))
p <- ggplot(fgseaResTidy, aes(reorder(pathway, NES), NES)) +
geom_col() +
coord_flip() +
labs(x="Pathway", y="Normalized Enrichment Score",
title=paste0(title, ": ", path, " pathways NES from GSEA")) +
theme_minimal()
return(p)
}
fGSEA_pipe(res_1, "hallmark", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_1, "KEGG", "Female SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_1, "GO_BP", "Female SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_2, "hallmark", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_2, "KEGG", "Female 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_2, "GO_BP", "Female 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_3, "hallmark", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_3, "KEGG", "Female SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_3, "GO_BP", "Female SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_4, "hallmark", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_4, "KEGG", "Female SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_4, "GO_BP", "Female SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_5, "hallmark", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_5, "KEGG", "Male SPF vs GF")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_5, "GO_BP", "Male SPF vs GF") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_6, "hallmark", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_6, "KEGG", "Male 4w vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_6, "GO_BP", "Male 4w vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_7, "hallmark", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_7, "KEGG", "Male SPF vs 4w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_7, "GO_BP", "Male SPF vs 4w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
fGSEA_pipe(res_8, "hallmark", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
fGSEA_pipe(res_8, "KEGG", "Male SPF vs 12w")
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
p <- fGSEA_pipe(res_8, "GO_BP", "Male SPF vs 12w") + theme(axis.text.y = element_blank(), axis.ticks.y = element_blank())
## Warning in fgsea(pathways = gmtPathways(paste0(pathway_folder, p)), stats = ranks, : There are ties in the preranked stats (5.34% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
ggplotly(p)
smanzoor@uchicago.edu